# Replication Archive for Generalizability of IR Experiments beyond the U.S.
# Code to produce simulation study estimating power of Ding, Feller and Miratrix (2016) heterogeneity test. 
# This code builds on R code from Coppock (2019), PSRM:
# Generalizing from Survey Experiments Conducted on Mechanical Turk: A Replication Approach

rm(list = ls())
library(randomizr)
library(reshape2)
library(tidyverse)

source("functions.R")

# Simulation --------------------------------------------------------------

# The simulation takes a very long time to run, if you wish to skip it, you can simply load end results.

 sims <- 100
 Ns <- seq(200, 3000, 100)
 het_params <- seq(0, .5, .05)
 powers_mat <- matrix(NA, ncol = length(het_params), nrow = length(Ns))
 
 for(k in seq_along(Ns)){
   N <- Ns[k]
   for(j in seq_along(het_params)){
     sig <- rep(NA, sims)
     pb <- txtProgressBar(min = 0, max = sims, style = 3)
     for(i in 1:sims){
       X <- rnorm(N)
       Y0 <- X
       Y1 <- Y0 + het_params[j]*X
       Z <- complete_ra(N = N)
       Y <- Y0*(1-Z) + Y1*Z
       test <- homogenous_fx_binary(Y, Z, sims = 100, verbose = FALSE)
       sig[i] <- test$summary_df$max_pval <= 0.05
       setTxtProgressBar(pb, i)
     }
     powers_mat[k,j] <- mean(sig)
     print(paste0("N = ", Ns[k], " and sd(tau) = ", het_params[j], "."))
   }
 }
 close(pb)
 
 df <- melt(powers_mat)
 df$n <- Ns[df$Var1]
 df$het_param <- het_params[df$Var2]

 save(powers_mat, df, file = "sim.rdata")

 # Load end results --------------------------------------------------------------
 
load(file = "sim.rdata")

df <- within(df,{
  n_per_arm <- n/2
})
unique(df$het_param)

# Plot and save --------------------------------------------------------------

g <-
  ggplot(df, aes(x = n_per_arm, y = value, group = het_param, color = het_param)) + 
  geom_line() +
  #geom_vline(xintercept = 500, linetype = "dashed") + 
  geom_hline(yintercept = 0.8, linetype = "dashed") + 
  annotate("text", x = 1200, y = 0.85, label = "sigma[tau] %~~% 0.15", parse = TRUE) +
  scale_y_continuous(breaks = seq(0, 1, .2)) +
  xlab("Number of Subjects Per Arm") +
  ylab("Probability of Rejecting Null of Homogeneity") +
  scale_color_gradient(low = "gainsboro", high = "black", guide = guide_colorbar(bquote(sigma[tau]))) +
  theme_bw() +
  theme(legend.key.height = unit(6, "lines"))

ggsave(filename = "../figures/appendix/sim_figure.pdf", g, height = 7, width = 7)
